
. /* ------------------------------------------------------------------------ ** 
>     C. Wunder, A. Wiencierz, J. Schwarze, H. K�chenhoff:    
>     Well-being over the life span: semiparametric evidence from British 
>     and German longitudinal data
> 
>     Data source:        British Household Panel Survey (BHPS), years 1996-2008.
>     Data organization:  person-year observations    
>     Description:        Generates and re-codes variables retrieved from the BHPS.
>                         The file "BHPS-1-Retrieval.bhps" needs to be executed 
>                         before.
>     ----------------------------------------------------------------------- 
> 
>     Description of variables:
> 
>     ls                  life satisfaction (7-point scale)
>     age                 age
>     female              female = 1; male = 0
>     ln_hhinc            log of net household income
>     ln_hhnpers          log of household size
>     bad_health          health problems: yes = 1; 
>     educ_mid            education: mid = 1; otherwise = 0 
>     educ_high           education: high = 1; otherwise = 0
>     jstat_ausb          in school = 1; otherwise = 0
>     jstat_empl          (self-)employed = 1; otherwise = 0
>     jstat_unem          unemployed = 1; otherwise = 0
>     jstat_reti          retired = 1; otherwise = 0
>     mstat2              coupled = 1; otherwise = 0
>     mstat3              widowed = 1; otherwise = 0
>     mstat4              divorced = 1; otherwise = 0
>     mstat5              separated = 1; otherwise = 0
>     mstat6              single = 1; otherwise = 0
>     attr_in_1           attrition in 1 = 1; otherwise = 0
>     attr_in_2           attrition in 2 = 1; otherwise = 0
>     attr_in_3           attrition in 3 = 1; otherwise = 0
>     period2-period10    wave indicators (year 2001 omitted)     
> ** ------------------------------------------------------------------------ */  
. version 10

. use "BHPS-long.dta", clear
(PanelWhiz-BHPS: [SWB_and_Age_1991-2008]       [23 Jul 2009/17:00:36]  john@panel)

. drop if year < 1996 | year == 2001
(195258 observations deleted)

. mvdecode _all, mv( -9=.a \ -8=.b \ -7=.c \ -6=.d \ -5=.e \ -4=.f \ -3=.g \ -2=.h \ -1=.i 
> )
         pno: 131582 missing values generated
         hid: 131582 missing values generated
       years: string variable ignored
         sex: 180 missing values generated
       ivfho: 131582 missing values generated
       ivfio: 131582 missing values generated
        dobm: 640 missing values generated
        doby: 510 missing values generated
    hrfihhmn: 4693 missing values generated
    hrfihhyr: 4693 missing values generated
     hrnkids: 3 missing values generated
    iamastat: 120 missing values generated
    irhldsbl: 16063 missing values generated
     irhlprb: 85956 missing values generated
    irhlprba: 1751 missing values generated
    irhlprbb: 1751 missing values generated
    irhlprbc: 1751 missing values generated
    irhlprbd: 1751 missing values generated
    irhlprbe: 1751 missing values generated
    irhlprbf: 1751 missing values generated
    irhlprbg: 1751 missing values generated
    irhlprbh: 1751 missing values generated
    irhlprbi: 1751 missing values generated
    irhlprbj: 1751 missing values generated
    irhlprbk: 1751 missing values generated
    irhlprbl: 1751 missing values generated
    irhlprbm: 1751 missing values generated
    irhlprbn: 1693 missing values generated
    irhlprbo: 1693 missing values generated
    irhlstat: 61 missing values generated
     irjbsat: 69673 missing values generated
    irjbstat: 101 missing values generated
    irlfsat1: 8852 missing values generated
    irlfsat2: 9120 missing values generated
    irlfsat3: 9174 missing values generated
    irlfsat5: 11222 missing values generated
    irlfsat7: 9211 missing values generated
    irlfsat8: 9201 missing values generated
    irlfsato: 9402 missing values generated
    irqfedhi: 8784 missing values generated

. 
. /*  --------( Coding of variables )---------------------------------------- */
. 
. gen         interview = (ivfio==1 | ivfio==3)

. sort        pid year

. by pid:     gen attr_in_1 = interview[_n+1]==0

. by pid:     gen attr_in_2 = interview[_n+2]==0 

. by pid:     gen attr_in_3 = interview[_n+3]==0

. replace     attr_in_1 = 0 if year==2006 | year==2000
(16320 real changes made)

. replace     attr_in_2 = 0 if year==2006 | year==2005 | year==1999
(16320 real changes made)

. replace     attr_in_3 = 0 if year==2006 | year==2005 | year==2004 | year==1998
(16320 real changes made)

. gen         age     = year - doby 
(510 missing values generated)

. gen         agesq   = age^2/10^2
(510 missing values generated)

. gen         agecub  = age^3/10^3
(510 missing values generated)

. gen         agequa  = age^4/10^4
(510 missing values generated)

. gen         ln_hhinc    = ln(hrfihhmn)      
(167650 missing values generated)

. gen         ln_hhnpers  = ln(hrhhsize)      
(162571 missing values generated)

. qui tab     irhlstat, gen(srh)              

. gen         bad_health = irhlprba==1 | irhlprbb==1 | irhlprbc==1 | irhlprbd==1 | ///
>             irhlprbe==1 | irhlprbf==1 | irhlprbg==1 | irhlprbh==1 | ///
>             irhlprbi==1 | irhlprbj==1 | irhlprbk==1 | irhlprbl==1 | /// 
>             irhlprbm==1

. qui tab     irhlprb bad_health

. gen         educ_low = irqfedhi==12 if (irqfedhi!=11 & irqfedhi!=13)
(1894 missing values generated)

. gen         educ_high = irqfedhi>=1 & irqfedhi<=4 if (irqfedhi!=11 & irqfedhi!=13)
(1894 missing values generated)

. gen         educ_mid  = irqfedhi>=5 & irqfedhi<=10 if (irqfedhi!=11 & irqfedhi!=13)
(1894 missing values generated)

. gen         jstat_empl= irjbstat==1 | irjbstat==2  | irjbstat==10

. gen         jstat_unem = irjbstat==3            

. gen         jstat_reti = irjbstat==5            

. gen         jstat_ausb = irjbstat==7            

. gen         jstat_nonw = irjbstat>=5 & (irjbstat<=6 | irjbstat==8 | irjbstat==10)

. qui tab     iamastat, gen(mstat)            

. replace     mstat1=1 if mstat7==1       
(32401 real changes made)

. gen         female = sex==2             

. ren         irlfsato ls 

. qui su      doby                    

. gen         cohort = doby - r(min)
(510 missing values generated)

. gen         cohortsq = cohort^2
(510 missing values generated)

. qui tab     year, gen(period)               

. 
. /*  --------( Define sample )---------------------------------------------- */   
. drop        if hrfihhmn<50
(699 observations deleted)

. drop        if age<18   
(33247 observations deleted)

. global      exo_vars ///
>             ln_hhinc        ln_hhnpers      female          bad_health      /// 
>             educ_mid        educ_high       jstat_ausb      jstat_empl      ///
>             jstat_unem      jstat_reti      mstat2          mstat3          ///
>             mstat4          mstat5          mstat6          attr_in_1       ///
>             attr_in_2       attr_in_3       period2         period3         ///
>             period4         period5         period6         period7         /// 
>             period8         period9         period10        

. tokenize    ${exo_vars}

. global      gls_exo_vars ///
>             gls_`1'  gls_`2'  gls_`3'  gls_`4'  gls_`5'  gls_`6'  gls_`7'  gls_`8'  ///
>             gls_`9'  gls_`10' gls_`11' gls_`12' gls_`13' gls_`14' gls_`15' gls_`16' ///
>             gls_`17' gls_`18' gls_`19' gls_`20' gls_`21' gls_`22' gls_`23' gls_`24' ///
>             gls_`25' gls_`26' gls_`27' gls_one

. global      exo_vars_ac ///     
>             ln_hhinc        ln_hhnpers      female          bad_health      ///
>             educ_mid        educ_high       jstat_ausb      jstat_empl      ///
>             jstat_unem      jstat_reti      mstat2          mstat3          ///
>             mstat4          mstat5          mstat6          attr_in_1       ///
>             attr_in_2       attr_in_3       cohort          cohortsq

. tokenize    ${exo_vars_ac}

. global      gls_exo_vars_ac ///
>             gls_`1'  gls_`2'  gls_`3'  gls_`4'  gls_`5'  gls_`6'  gls_`7'  gls_`8'  ///
>             gls_`9'  gls_`10' gls_`11' gls_`12' gls_`13' gls_`14' gls_`15' gls_`16' ///
>             gls_`17' gls_`18' gls_`19' gls_`20' 

. * drop observations with missing values on key variables
. local vars hrfihhmn hrfihhyr hrhhsize hrnkids iamastat irjbstat ls irqfedhi ${exo_vars} y
> ear age    

. foreach var of local vars {         
  2. drop if `var'>=.
  3. }
(151027 observations deleted)
(0 observations deleted)
(0 observations deleted)
(3 observations deleted)
(716 observations deleted)
(6198 observations deleted)
(6906 observations deleted)
(1843 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(1125 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(0 observations deleted)
(10 observations deleted)

. keep        pid year ls age* ${exo_vars} year doby cohort*  

. 
. /*  --------( Define variable labels )------------------------------------- */
. label var   ln_hhinc "log of net household income"

. label var   ln_hhnpers "log of household size"

. label var   bad_health "health problems"

. label var   educ_high "education: high"

. label var   educ_mid "education: mid"

. label var   jstat_empl "(self-)employed"

. label var   jstat_unem "unemployed"

. label var   jstat_reti "retired"

. label var   jstat_ausb "in school"

. label var   age "age"

. label var   agesq "age squared/10^2"

. label var   agecub "age cubed/10^3"

. label var   agequa "age quartic/10^4"

. label var   female "sex: female"

. label var   mstat2 "coupled"

. label var   mstat3 "widowed"

. label var   mstat4 "divorced"

. label var   mstat5 "separated"

. label var   mstat6 "single"

. label var   period1 "1996"

. label var   period2 "1997"

. label var   period3 "1998"

. label var   period4 "1999"

. label var   period5 "2000"

. label var   period6 "2002"

. label var   period7 "2003"

. label var   period8 "2004"

. label var   period9 "2005"

. label var   period10 "2006"

. xtset pid year
       panel variable:  pid (unbalanced)
        time variable:  year, 1996 to 2006, but with gaps
                delta:  1 unit

. save "BHPS-raw.dta", replace
file BHPS-raw.dta saved

. 
. /*  --------( GLS-transformation )---------------------------------------- */
. use "BHPS-raw.dta", clear
(PanelWhiz-BHPS: [SWB_and_Age_1991-2008]       [23 Jul 2009/17:00:36]  john@panel)

. qui xtreg ls age agesq agecub $exo_vars

. scalar sig_e = e(sigma_e)

. scalar sig_u = e(sigma_u)

. gen one=1

. bysort pid: egen T_i = count(ls)    

. gen double theta_i = 1 - sig_e / (sqrt(sig_e^2 + T_i*sig_u^2))

. global all_vars $exo_vars ls age agesq agecub one

. foreach var of global all_vars{
  2.     bysort pid: egen im_`var' = mean(`var')
  3.     gen gls_`var' = `var' - theta_i*im_`var'
  4. }

. drop im*

. foreach var of global gls_exo_vars{
  2.     gen _`var' = `var'
  3. }

. sort pid year

. save "BHPS.dta", replace
file BHPS.dta saved

. qui log close
